library("RColorBrewer")
library("lubridate")
library("dplyr")

rp<-read.csv('../data/final_analysis_rugpull.csv', head=T, quote = '"')
rp$btctalkintrodate<- ymd(rp$btctalkintrodate)
rp$btctalkrugpulldate <- ymd(rp$btctalkrugpulldate)
rp$ym <- format(rp$btctalkrugpulldate, "%Y%m")

# Figfor rugpull categories over time after 2020
rpafter2020<-data.frame(rp[which(rp$ym!=201601 & rp$ym!=201803 & rp$ym!=201811),])
dom2<- c("202001","202002","202003","202004","202005","202006","202007","202008","202009","202010","202011","202012","202101","202102","202103","202104","202105","202106","202107","202108","202109","202110","202111","202112","202201","202202","202203","202204","202205","202206","202207","202208", "202209")
rpafter2020$ym <- factor(rpafter2020$ym, levels=dom2)
cattab1 <- table(rpafter2020$service,rpafter2020$ym)

dev.new(width=10,height=8,unit="in")
par(mar=c(5,7,4,4))
barplot(cattab1,col=brewer.pal(6,"Dark2"),angle=c(0,30,60,90,120,150),density=15,border=T,ylab='# of Rugpulls reported', axisnames=F,cex.axis = 1.5, cex.lab=2)
axis(side=1,at = c(0,14.5,29,43.5), labels=c("2020","2021","2022",""), cex.axis=1.5)
mtext("Date", side = 1, line = 3, cex=2) 
legend('topleft',c("Cloud mining","Exchange","ICO","IDO","NFT","Yield farm"),col=brewer.pal(6,"Dark2"),angle=c(0,30,60,90,120,150),density=15,text.col=brewer.pal(6,"Dark2"),fill=brewer.pal(6,"Dark2"),border=brewer.pal(6,"Dark2"))

#cdf for active time of rugpulls 
rp$totalactivetime<-rp$btctalkrugpulldate-rp$btctalkintrodate

# square ecdf plot
dev.new(width=10,height=10,unit="in")
par(mar=c(5,7,4,4))
plot(ecdf(rp$totalactivetime), xlab="Days", ylab="Fraction of Rugpulls active after x Days", do.points=FALSE, xlim=c(0,1500),verticals=TRUE, main="", cex.lab=2, cex.axis=2)

#btc and eth data 
btc<-read.csv('../data/bitcoin_2014-01-01_2022-09-30.csv', head=T, quote = '"')
btc$Date<-mdy(btc$Date)
btc$ym<-format(btc$Date,'%Y%m')

eth<-read.csv('../data/ethereum_2016-01-01_2022-09-30.csv', head=T, quote = '"')
eth$Date<-mdy(eth$Date)
eth$ym<-format(eth$Date,'%Y%m')

rugpulls<-data.frame(table(rp$ym))
names(rugpulls) <- c("month", "freq")
rugpulls$month<-ym(rugpulls$month)

df3<-data.frame(month=as.Date(c("2014-07-01","2014-08-01","2014-09-01","2014-10-01","2014-11-01","2014-12-01","2015-01-01","2015-02-01","2015-03-01","2015-04-01","2015-05-01","2015-06-01","2015-07-01","2015-08-01","2015-09-01","2015-10-01","2015-11-01","2015-12-01","2016-01-01","2016-02-01","2016-03-01","2016-04-01","2016-05-01","2016-06-01","2016-07-01","2016-08-01","2016-09-01","2016-10-01","2016-11-01","2016-12-01","2017-01-01","2017-02-01","2017-03-01","2017-04-01","2017-05-01","2017-06-01","2017-07-01","2017-08-01","2017-09-01","2017-10-01","2017-11-01","2017-12-01","2018-01-01","2018-02-01","2018-03-01","2018-04-01","2018-05-01","2018-06-01","2018-07-01","2018-08-01","2018-09-01","2018-10-01","2018-11-01","2018-12-01","2019-01-01","2019-02-01","2019-03-01","2019-04-01","2019-05-01","2019-06-01","2019-07-01","2019-08-01","2019-09-01","2019-10-01","2019-11-01","2019-12-01","2020-01-01","2020-02-01","2020-03-01","2020-04-01","2020-05-01","2020-06-01","2020-07-01","2020-08-01","2020-09-01","2020-10-01","2020-11-01","2020-12-01","2021-01-01","2021-02-01","2021-03-01","2021-04-01","2021-05-01","2021-06-01","2021-07-01","2021-08-01","2021-09-01","2021-10-01","2021-11-01","2021-12-01","2022-01-01","2022-02-01","2022-03-01","2022-04-01","2022-05-01","2022-06-01","2022-07-01","2022-08-01","2022-09-01")),freq=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))

rugpulls<-merge(rugpulls, df3, by = "month", all = TRUE)
rugpulls$freq <- rugpulls$freq.x
rugpulls$freq.x <- NULL
rugpulls$freq.y <- NULL
rugpulls$freq <- replace(rugpulls$freq, is.na(rugpulls$freq), 0)
rugpulls$freq <- rugpulls$freq+1

# plot btc, eth and rugpulls for 2020-2022

par(mar=c(5,7,7,5))
plot(btc$Date, btc$Close, type="l", col="blue", ylim=c(100,65000), xlim=c(as.Date("2020-01-01"), as.Date("2022-09-25")),ylab="Price of BTC in USD", xlab = "Date", lty=1, log='y', xaxt="n", cex.lab=1.5, cex.lab=2)
axis.Date(1, at=seq(as.Date("2020-01-01"), as.Date("2022-01-01"), by="12 mon"), format="%Y", cex.axis=1.5)
par(new=TRUE)
plot(eth$Date, eth$Close, type="l", col="brown", ylim=c(100,65000), xlim=c(as.Date("2020-01-01"), as.Date("2022-09-25")), axes=FALSE, ylab="", xlab = "", lty=1, log="y")
par(new=TRUE)
plot(rugpulls$month, rugpulls$freq, type="l", ylim=c(1,500), xlim=c(as.Date("2020-01-01"), as.Date("2022-09-25")), col="magenta", axes = FALSE, lty=1, xlab = "", ylab = "", log="y")
axis(side=4,at = pretty(range(rugpulls$freq)),cex.axis=1.5)
mtext("# of Rugpulls", side = 4, line = 3, cex=2) 
legend('topleft',legend=c("BTC-USD","ETH-USD","Rugpulls"), col=c("blue","brown", "magenta"), lty=c(1,1,1), ncol=1)

# aggregator ico data
coincodex<-read.csv('../data/coincodex_ico_startdate.csv', head=T, quote = '"')
cmc<-read.csv('../data/cmc_start_dates.csv', head=T, quote = '"')
cmcicos<-cmc[cmc$Category=='ICO',]

#rugpull data
df1<-data.frame(rp[rp$service=='ICO',])
df1<-data.frame(table(df1$ym))
df1$Var1<-ym(df1$Var1)

coincodex$startdate<-ymd(coincodex$startdate) # 57 do not have a start date
cmcicos$startdate <- ymd(cmcicos$startdate)

cmcicos$ym <- format(cmcicos$startdate,'%Y%m')
cmcicos<-subset(cmcicos, select = -c(Category)) # remove extra column saying ICOs
coincodex$ym <- format(coincodex$startdate,'%Y%m')

#merge both introduced icos
cmcicos$Name<-tolower(cmcicos$Name)
coincodex$Name<-tolower(coincodex$Name)

totalicos<-merge(x=coincodex, y=cmcicos, all=TRUE)
freqicos<-data.frame(table(totalicos$ym))
freqicos$Var1<-ym(freqicos$Var1)

names(freqicos) <- c("month", "freq")
names(df1) <- c("month", "freq")

freqicos<-merge(x=freqicos, y=df3, by="month", all=TRUE)
freqicos$freq <- freqicos$freq.x
freqicos$freq.x <- NULL
freqicos$freq.y <- NULL
freqicos$freq[is.na(freqicos$freq)] <- 0

df1<-merge(x = df1, y = df3, by = "month", all = TRUE)
df1$freq <- df1$freq.x
df1$freq.x <- NULL
df1$freq.y <- NULL
df1$freq[is.na(df1$freq)] <- 0

# using the total data
par(mar=c(5,7,7,5))
plot(freqicos$month, freqicos$freq, type="l", col="blue", ylab="# of ICOs introduced", xlab = "Date", lty=1, cex.lab=2, cex.axis=1.5, xlim=c(as.Date('2017-01-01'),max(freqicos$month))) #total data
par(new=TRUE)
plot(df1$month, df1$freq, type="l", col="red", axes = FALSE, lty=2, xlab = "", ylab = "",xlim=c(as.Date('2017-01-01'),max(freqicos$month)))
axis(side=4,at = pretty(range(df1$freq)),cex.axis=1.5)
mtext("# of Rugpulled ICOs", side = 4, line = 3, cex=2)
legend('topleft',legend=c("ICOs introduced","ICOs rugpulled"), col=c("blue","red"), lty=c(1,2), ncol=1)

# Plot for number of threads with keywords exit scams and rugpulls over time 
esthread<-data.frame(year=c('201801','201901','202001','202101','202201'), threads=c(230,232,251,138,89))
rpthread<-data.frame(year=c('201801','201901','202001','202101','202201'), threads=c(0,6,49,148,137))
totalthread<-data.frame(year=c('201801','201901','202001','202101','202201'), threads=c(230,238,300,286,226))
icothreads<-data.frame(year=c('201801','201901','202001','202101','202201'), threads=c(114, 93, 125, 71, 59))
icothreads$year<-ym(icothreads$year)
rpthread$year<-ym(rpthread$year)
esthread$year<-ym(esthread$year)
totalthread$year<-ym(totalthread$year)

par(mar=c(5,7,7,5))
plot(esthread$year, esthread$threads, type='l', xlab='Date', ylab = '# of threads', col='brown', ylim=c(0,300), xlim=c(min(esthread$year),max(esthread$year)), cex.lab=2, cex.axis=1.5)
par(new=TRUE)
plot(rpthread$year, rpthread$threads, type='l', axes=FALSE, ylab="", xlab = "", col='blue', ylim=c(0,300), xlim=c(min(esthread$year),max(esthread$year)))
par(new=TRUE)
plot(totalthread$year, totalthread$threads, type='l', axes=FALSE, ylab="", xlab = "", col='magenta', ylim=c(0,300), xlim=c(min(esthread$year),max(esthread$year)))
par(new=TRUE)
plot(icothreads$year, icothreads$threads, type = 'l', axes=FALSE, ylab="", xlab = "", col='black', ylim=c(0,300), xlim=c(min(esthread$year),max(esthread$year)), lty=2)
legend(min(totalthread$year),220,legend = c("exit scam threads", "rugpull threads", "total threads", "ico threads"), col = c("brown", "blue","magenta","black"), lty = c(1,1,1,2), cex=1.4, bty='n')

# spearman correlation between eth value and frequencies of rugpull per month - Section 4.2
# data for eth, freq of rugpulls and their differences per month with 34 data points
rpeth<-data.frame(month=c(202001,202002,202003,202004,202005,202006,202007,202008,202009,202010,202011,202012,202101,202102,202103,202104,202105,202106,202107,202108,202108,202109,202110,202111,202112,202201,202202,202203,202204,202205,202206,202207,202208,202209),freq=c(0,1,0,0,0,0,0,0,1,5,4,0,0,1,17,11,4,3,3,3,3,2,6,3,1,4,7,2,2,13,1,2,2,0),value=c(144.4125,228.1848,200.6159,170.1788,210.6112,246.3781,246.4939,379.4443,355.3433,340.6617,416.2388,595.5485,1040.7892,1662.0745,1533.4794,2082.7500,3242.7177,2859.2200,2232.1733,2614.3574,3797.4873,3307.9194,4297.5317,4641.6457,3724.1063,2598.4454,2938.1166,3389.6567,2817.7267,1827.2122,1147.7290,1736.8710,1512.3606,1347.6918),valuechange=c(0,83.7723,-27.5689,-30.4371,40.4324,35.7669,0.1158,132.9504,-24.1010,-14.6816,75.5771,179.3097,445.2407,621.2853,-128.5951,549.2706,1159.9677,-383.4977,-627.0467,382.1841,1183.1299,-489.5679,989.6123,344.1140,-917.5394,-1125.6609,339.6712,451.5401,-571.9300,-990.5145,-679.4832,589.1420,-224.5104,-164.6688),freqchange=c(0,1,-1,0,0,0,0,0,1,4,-1,-4,0,1,16,-6,-7,-1,0,0,0,-1,4,-3,-2,3,3,-5,0,11,-12,1,0,-2))
res<-cor.test(rpeth$value, rpeth$freq, method="spearman",exact=FALSE)
res

# We test the same for 971 data points i.e. using daily ETH price exchange
ethdaily<-read.csv('../data/ethereum_2020-01-01_2022-08-31.csv', head=T, quote = '"')
df3<-data.frame(Date=c(seq(as.Date("2020-01-01"), as.Date("2022-08-30"), by="days")),Freq=c(seq(0,0)))
df2<-data.frame(table(rp$btctalkrugpulldate))
names(df2) <- c("Date", "Freq")
df2$Date<-ymd(df2$Date)
df2<-data.frame(df2[which(df2$Date!=as.Date('2016-01-15') & df2$Date!=as.Date('2018-03-07') & df2$Date!=as.Date('2018-11-09')),])
df2<-merge(df2,df3,by="Date", all=TRUE)
df2$Freq <- replace(df2$Freq.x, is.na(df2$Freq.x), 0)
ethdaily$Date<-mdy(ethdaily$Date)
ethdaily<-ethdaily[!duplicated(ethdaily$Date), ]
ethdaily<-merge(ethdaily,df3,by="Date", all=TRUE) # 2 dates are found with no data

res<-cor.test(ethdaily$Close, df2$Freq, method="spearman",exact=FALSE)
res
